Setup
#install.packages('rtrek')
#install.packages('ggsci')
library(rtrek)
library(tidyverse)
library(plotly)
library(ggsci)
#load data
trek_data <- read_csv("trek_data.csv")
#reorder series levels for plotting
trek_data$Series <- fct_relevel(trek_data$Series, c("TOS", "TNG", "DS9", "VOY"))
1st Damn Fool Question:
Is there a difference in the distribution of number of characters per episode by series?
p <- plot_ly(trek_data, y = ~NChars, color = ~Series,
colors = pal_startrek("uniform")(4)) %>%
add_boxplot(x = ~Series) %>%
hide_legend() %>%
layout(title = "Number of Characters Per Episode By Series",
xaxis = list(title = "Series"),
yaxis = list(title = "Number of Characters \n (per episode)"))
p
## Warning: Ignoring 1 observations
Proportion of Episodes of which the Characters Are at Least 20% Female, by Series
library(dplyr)
library(plotly)
df<-read.csv("trek_data.csv")
m<-rep(NA,nrow(df))
for(i in 1:nrow(df)){
m[i]<-df$N_F[i]/df$NChars[i]
}
m<-m[!is.na(m)]
boundary<-.2
Series<-c("TOS","TNG","DS9","VOY")
dfds9<-filter(df,Series=="DS9")
dftng<-filter(df,Series=="TNG")
dftos<-filter(df,Series=="TOS")
dfvoy<-filter(df,Series=="VOY")
ds91<-nrow(filter(dfds9,N_F/NChars>=boundary))/nrow(dfds9)
ds92<-1-ds91
tng1<-nrow(filter(dftng,N_F/NChars>=boundary))/nrow(dftng)
tng2<-1-tng1
tos1<-nrow(filter(dftos,N_F/NChars>=boundary))/nrow(dftos)
tos2<-1-tos1
voy1<-nrow(filter(dfvoy,N_F/NChars>=boundary))/nrow(dfvoy)
voy2<-1-voy1
prop1<-c(tos1,tng1,ds91,voy1)
prop2<-c(tos2,tng2,ds92,voy2)
data<-data.frame(Series,prop1,prop2)
p <- plot_ly(data, x = ~Series, y = ~prop1, type = 'bar', name = 'At Least 20% Female',
textfont = list(size = 13))%>%
add_trace(y = ~prop2, name = 'Less Than 20% Female') %>%
layout(autosize=F, width=500, height=500, margin= list(l = 50,r = 50,b = 100,t = 100,pad = 10),title = "Proportion of Episodes in which \n the Characters Are at Least 20% Female, by Series \n" ,yaxis = list(title = 'Proportion of Episodes'), barmode = 'group')
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
p